{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import subprocess\n",
    "import sys\n",
    "import threading\n",
    "from queue import Queue, Empty\n",
    "\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from mlflow.models.signature import infer_signature\n",
    "\n",
    "from cuml.metrics.accuracy import accuracy_score\n",
    "from cuml.preprocessing.model_selection import train_test_split\n",
    "from cuml.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pull sample airline data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!wget -N https://rapidsai-cloud-ml-sample-data.s3-us-west-2.amazonaws.com/airline_small.parquet"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define data loader, using cuDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data(fpath):\n",
    "    \"\"\"\n",
    "    Simple helper function for loading data to be used by CPU/GPU models.\n",
    "\n",
    "    :param fpath: Path to the data to be ingested\n",
    "    :return: DataFrame wrapping the data at [fpath]. Data will be in either a Pandas or RAPIDS (cuDF) DataFrame\n",
    "    \"\"\"\n",
    "    import cudf\n",
    "\n",
    "    df = cudf.read_parquet(fpath)\n",
    "    X = df.drop([\"ArrDelayBinary\"], axis=1)\n",
    "    y = df[\"ArrDelayBinary\"].astype('int32')\n",
    "    \n",
    "    return train_test_split(X, y, test_size=0.2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define our training routine."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(fpath, max_detph, max_features, n_estimators):\n",
    "    \"\"\"\n",
    "    :param fpath: Path or URL for the training data used with the model.\n",
    "    :max_detph: int Max tree depth\n",
    "    :max_features: float percentage of features to use in classification\n",
    "    :n_estimators: int number of trees to create\n",
    "    :return: Trained Model\n",
    "    \"\"\"\n",
    "    X_train, X_test, y_train, y_test = load_data(fpath)\n",
    "    mod = RandomForestClassifier(max_depth=max_depth, max_features=max_features, n_estimators=n_estimators)\n",
    "    acc_scorer = accuracy_score\n",
    "\n",
    "    mod.fit(X_train, y_train)\n",
    "    preds = mod.predict(X_test)\n",
    "    acc = acc_scorer(y_test, preds)\n",
    "\n",
    "    mlparams = {\"max_depth\": str(max_depth),\n",
    "                \"max_features\": str(max_features),\n",
    "                \"n_estimators\": str(n_estimators),\n",
    "                }\n",
    "    mlflow.log_params(mlparams)\n",
    "\n",
    "    mlmetrics = {\"accuracy\": acc}\n",
    "    mlflow.log_metrics(mlmetrics)\n",
    "\n",
    "    return mod, infer_signature(X_train.to_pandas(), y_train.to_pandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implement our MLFlow training loop, and save our best model to the tracking server."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conda_env = f'conda.yaml'\n",
    "fpath     = f'airline_small.parquet'\n",
    "\n",
    "max_depth = 10\n",
    "max_features = 0.75\n",
    "n_estimators = 500\n",
    "\n",
    "artifact_path = \"Airline-Demo\"\n",
    "artifact_uri = None\n",
    "experiment_name = \"RAPIDS-Notebook\"\n",
    "experiment_id = None\n",
    "\n",
    "mlflow.set_tracking_uri(uri='sqlite:////tmp/mlflow-db.sqlite')\n",
    "mlflow.set_experiment(experiment_name)\n",
    "\n",
    "with mlflow.start_run(run_name=\"(Notebook) RAPIDS-MLFlow\"): \n",
    "    model, signature = train(fpath, max_depth, max_features, n_estimators)\n",
    "        \n",
    "    mlflow.sklearn.log_model(model,\n",
    "                             signature=signature,\n",
    "                             artifact_path=artifact_path,\n",
    "                             registered_model_name=\"rapids-mlflow-notebook\",\n",
    "                             conda_env='conda.yaml')\n",
    "    \n",
    "    artifact_uri = mlflow.get_artifact_uri(artifact_path=artifact_path)\n",
    "print(artifact_uri)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Begin serving our trained model using MLFlow\n",
    "**Note:** The serving thread will continue to run after cell execution. Select the cell and click 'interrupt the kernel' to stop it."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "In a terminal, run: `mlflow models serve -m [artifact_uri] -p [port]`, you should see something similar to the following:\n",
    "\n",
    "```shell\n",
    "2020/07/27 13:59:49 INFO mlflow.models.cli: Selected backend for flavor 'python_function'\n",
    "2020/07/27 13:59:49 INFO mlflow.pyfunc.backend: === Running command 'source /anaconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-3335621df6011b1847d2555b195418d4496e5ffd 1>&2 && gunicorn --timeout=60 -b 127.0.0.1:5000 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'\n",
    "[2020-07-27 13:59:50 -0600] [23779] [INFO] Starting gunicorn 20.0.4\n",
    "[2020-07-27 13:59:50 -0600] [23779] [INFO] Listening at: http://127.0.0.1:5000 (23779)\n",
    "[2020-07-27 13:59:50 -0600] [23779] [INFO] Using worker: sync\n",
    "[2020-07-27 13:59:50 -0600] [23788] [INFO] Booting worker with pid: 23788\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "### Make requests against the deployed model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import requests\n",
    "\n",
    "host='localhost'\n",
    "port='55755'\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"format\": \"pandas-split\"\n",
    "}\n",
    "\n",
    "data = { \n",
    "    \"columns\": [\"Year\", \"Month\", \"DayofMonth\", \"DayofWeek\", \"CRSDepTime\", \"CRSArrTime\", \"UniqueCarrier\",\n",
    "                \"FlightNum\", \"ActualElapsedTime\", \"Origin\", \"Dest\", \"Distance\", \"Diverted\"],\n",
    "    \"data\": [[1987, 10, 1, 4, 1, 556, 0, 190, 247, 202, 162, 1846, 0]]\n",
    "}\n",
    "\n",
    "## Pause to let server start\n",
    "time.sleep(5)\n",
    "\n",
    "while (True):\n",
    "    try:\n",
    "        resp = requests.post(url=\"http://%s:%s/invocations\" % (host, port), data=json.dumps(data), headers=headers)\n",
    "        print('Classification: %s' % (\"ON-Time\" if resp.text == \"[0.0]\" else \"LATE\"))\n",
    "        break\n",
    "    except Exception as e:\n",
    "        errmsg = \"Caught exception attempting to call model endpoint: %s\" % e\n",
    "        print(errmsg, end='')\n",
    "        print(\"Sleeping\")\n",
    "        time.sleep(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mlflow",
   "language": "python",
   "name": "mlflow"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}